#! C:/programme/perl5
use warnings;
use strict;
use diagnostics;
use HTML::FormatText;
open OUTPUT, "> D:/prelex2013/proposalsinfo.dat" or die "Problem: $!";
print OUTPUT "Proposal ID\tType\tType of decision\tProcedure\tLegal Basis\tField of activity\tDG primarily responsible\tDG jointly responsible\tDG associated\tAdoption by Commission\tCommission Decision Mode\tWithdrawal or Replacement date\tTransmission to Council\tCouncil Decision mode\tAdoption Council\tWithdrawal or Replacement\tChange of legal basis\tReading\tB Point\n";
close OUTPUT;
our $count=1;
our $ch;
our $ch_count;
our $proposal;
open INPUT, "< D:/prelex2013/locationall.txt" or die "Problem: $!";
our @INPUT=<INPUT>;
close INPUT;
foreach our $input (@INPUT) {
 chomp $input;
 # Checking that file exists
 open FILE, "< $input" or die "$!";
 close FILE;
 # Opening file, removing HTML and formatting it (might include line breaks between parts that are next to each other on webpage)
 my $content=HTML::FormatText->format_file($input);
 if ($content=~/(COM|SEC)\s\((\d{4})\)\s(\d{1,3})(.{0,5})/) {
     my $d_idtype=$1;
     my $d_year=$2;
     my $d_number=$3;
     my $d_subnumber=$4;
     if ($d_subnumber=~/\s-\s([0-9]{1,2})/) {$d_subnumber=$1;} else {$d_subnumber='';}
     if ($d_subnumber ne '') {$proposal="$d_idtype "."\($d_year\)"." $d_number"." - $d_subnumber";}
     else {$proposal="$d_idtype "."\($d_year\)"." $d_number";}
  }     
 my $d_cou_reading='not applicable';
 my $d_com_withdrawdate='not applicable';
 my $d_com_withdraw=0;
 my $d_changelegalbasis=0;
 my $d_primarilydg='missing';
 my $d_cou_adopt='missing';
 my $d_procedure='missing';
 my $d_decisiontype='missing';
 my $d_type='missing';
 my $d_jointlydg='missing';
 my $d_associateddg='missing';
 my $d_legalbasis='missing';
 my $d_fieldofactivity='missing';
 my $d_com_adopt='missing';
 my $d_com_decmod='missing';
 my $d_cou_start='missing';
 my $d_cou_compos='missing';
 my $d_cou_decmod='missing';
 my $d_cou_bpoint=0;
 if ($content=~/Primarily\sresponsible\n\n(.*)\n/) {$d_primarilydg=$1;} else {$d_primarilydg='missing';}
# Is the following necesssary (primarily responsible not followed by two line breaks? 
 #if ($d_primarilydg=~/(.*?)Jointly/) {$d_primarilydg=$1;}
 #if ($d_primarilydg=~/(.*?)Associated/) {$d_primarilydg=$1;}
 #if ($d_primarilydg=~/(.*?)Mandatory/) {$d_primarilydg=$1;}
 #if ($d_primarilydg=~/(.*?)Optional/) {$d_primarilydg=$1;}
if ($content=~/Associated\n\n(.*)\n/) {$d_associateddg=$1;} else {$d_associateddg='missing';}
# if ($d_associateddg=~/(.*?)Mandatory/) {$d_associateddg=$1;}
# if ($d_associateddg=~/(.*?)Optional/) {$d_associateddg=$1;}
# if ($content=~/Jointly\sresponsible(.*)(Associated|Mandatory)/) {$d_jointlydg=$1;} else {$d_jointlydg='missing';}
 if ($content=~/Legal basis:\n\nCommission\s:(.*)\n/) {$d_legalbasis=$1;} else {$d_legalbasis='missing';}
 if ($content=~/Fields\sof\sactivity:\n\n(.*?)\n\nLegal/s) {$d_fieldofactivity=$1;} else {$d_fieldofactivity='missing';}
 if ($d_fieldofactivity=~/\n{1,}/) {$d_fieldofactivity=~ s/\n/;/g}
 if ($content=~/Procedures:\n\nCommission\s:\s(.*?)\s(Commission|Council|procedure|Type)/) {$d_procedure=$1;} else {$d_procedure='missing';}
 if ($content=~/Type\sof\sfile:\n\nCommission\s:\s(.*?)(\nCouncil|[0-9]|\sfor|\sComments)/) {$d_type=$1;} else {$d_type='missing';}
 if ($content=~/Type\sof\sfile:\n\nCommission\s:\sProposal\sfor\sa\s(.*?)(\nCouncil|[0-9]|\sComments|\s|\n)/) {$d_decisiontype=$1;} else {$d_decisiontype='not applicable';}
 if ($content=~/Commission\sDecision\smode:\s(.*)\n\s/) {$d_com_decmod=$1;} else {$d_com_decmod='missing';}
# if ($content=~/Council\sDecision\smode:\s(.*)\n\s/) {$d_cou_decmod=$1;} else {$d_cou_decmod='missing';}
 if ($content=~/Adoption\sby\sCommission\n(\s*)([0-9]{2})-([0-9]{2})-([0-9]{4})/) {$d_com_adopt="$2\.$3\.$4";} else {$d_com_adopt='missing';} 
 if ($content=~/Transmission\sto\sCouncil\n(\s*)([0-9]{2})-([0-9]{2})-([0-9]{4})/) {$d_cou_start="$2\.$3\.$4";} else {$d_cou_start='missing';}
 if ($content=~/Adoption\scommon\sposition([0-9]{2})-([0-9]{2})-([0-9]{4})/) {$d_cou_compos="$1\.$2\.$3"} else {$d_cou_compos='missing';}
 $d_cou_adopt='missing';
 $d_cou_reading='missing';
 if ($content=~/Signature\sby\sEP\sand\sCouncil/)   {
    if ($content=~/Adopt.\sdeclaration\scomn\sposit.\n(\s*)([0-9]{2})-([0-9]{2})-([0-9]{4})/)  {$d_cou_adopt="$2\.$3\.$4"; $d_cou_reading=2;}}
 if ($content=~/Formal\sadoption\sby\sCouncil\n(\s*)([0-9]{2})-([0-9]{2})-([0-9]{4})/)  {$d_cou_adopt="$2\.$3\.$4"; $d_cou_reading=1;}
 if ($content=~/Council\sapproval\s1.\srdg\n(\s*)([0-9]{2})-([0-9]{2})-([0-9]{4})/) {$d_cou_adopt="$2\.$3\.$4"; $d_cou_reading=1;}
 if ($content=~/Council\sapproval\s2.\srdg\n(\s*)([0-9]{2})-([0-9]{2})-([0-9]{4})/) {$d_cou_adopt="$2\.$3\.$4"; $d_cou_reading=2;}
 if ($content=~/Council\sdecision\sat\s3.\srdg\n(\s*)([0-9]{2})-([0-9]{2})-([0-9]{4})/) {$d_cou_adopt="$2\.$3\.$4"; $d_cou_reading=3;}
 if ($content=~/Replacement\s([0-9]{2})-([0-9]{2})-([0-9]{4})/) {$d_com_withdrawdate="$1\.$2\.$3"; $d_com_withdraw=1;}
 if ($content=~/Withdrawal\sby\sCommission\n(\s*)([0-9]{2})-([0-9]{2})-([0-9]{4})/) {$d_com_withdrawdate="$2\.$3\.$4"; $d_com_withdraw=1;}
 if ($content=~/Change\sof\slegal\sbasis/) {$d_changelegalbasis=1;}
 if ($content=~/POINT\s\"B\"\sOJ\sCOUNCIL/) {$d_cou_bpoint='1'} else {$d_cou_bpoint='0'} 
 print "$count - Writing $proposal\n";
 open OUTPUT, ">> D:/prelex2013/proposalsinfo.dat" or die "Problem: $!";
 print OUTPUT "$proposal\t$d_type\t$d_decisiontype\t$d_procedure\t$d_legalbasis\t$d_fieldofactivity\t$d_primarilydg\t$d_jointlydg\t$d_associateddg\t$d_com_adopt\t$d_com_decmod\t$d_com_withdrawdate\t$d_cou_start\t$d_cou_decmod\t$d_cou_adopt\t$d_com_withdraw\t$d_changelegalbasis\t$d_cou_reading\t$d_cou_bpoint\n"; 
 close OUTPUT;
 $count++;
 }